# Replication materials ---------------------------------------------------
# Paper:    Campaigns and the Selection of Policy-seeking Representatives.
#           Legislative Studies Quarterly
# Authors: Shaun Bowler, Gail McElroy, Stefan Müller

#--------------------------------------------------------------------------
# Script for creating master data (European Parliament 7, 2009-2014)
#--------------------------------------------------------------------------

# Set the working directory

# Load necessary packages -------------------------------------------------

library(foreign) # For reading Stata files (.dta)
library(haven)
library(readstata13)
library(car)
library(tidyverse)

# Import files into R environment -----------------------------------------

# Read survey file
# NOTE: THIS FILE CANNOT BE INCLUDED IN THE REPLICATION MATERIAL
EP7_Survey_2010_final <- read.dta("data_ep_2009-2014/Survey2010_ID_final.dta")

# Recode to q1_5b to get experience/incumbent variable
EP7_Survey_2010_final$incumbent <- car::recode(EP7_Survey_2010_final$q1_5b, "1:2=0; else=1")

# Creat variable which takes the value 1 for all candidates in the survey file
EP7_Survey_2010_final$respondent <- factor(1)

# Read MEP activity file 
EP7 <- read.dta13("data_ep_2009-2014/ep_7_activity.dta") 

# Read file with positions in EP 7 of each MEP
EP7_Positions <- read.csv("data_ep_2009-2014/ep_7_positions.csv") 

# Recode variables 
EP7_Positions$committee_chair <- as.numeric(car::recode(EP7_Positions$commpos, "'Member'=0; else=1"))

EP7_Positions$partypos <- as.numeric(car::recode(EP7_Positions$partypos, "'member'=0; ''=NA; else=1"))


# Read file with elecotral systems for 2009 election (EP7)
EP7electoralsystems <- read.csv("data_ep_2009-2014/ep_7_electoralsystems.csv") 

# Coding of "electoral_system04": 1 = closed lists; 2 = preferential; 3 = STV
# Northern Ireland (northernireland) threated separately because STV while rest of UK closed lists

# The MEPs from Northern Ireland included in the dataset wer changed 
# from "United Kingdom" to "northern ireland" in the "Country" variable

# Read file with party group seats
EP7partygroups <- read.csv("data_ep_2009-2014/ep_7_partygroups.csv") 

# Merge Survey2010 with EP7 by "epid" and then with electoral systems
EP7_Survey2010_ID_merged <- left_join(EP7, EP7_Survey_2010_final, by = "epid")
EP7_Survey2010_ID_electoralsystems_merged <- left_join(EP7_Survey2010_ID_merged, EP7electoralsystems, by = "country")
EP7_Survey2010_ID_electoralsystems_partygroups_merged <- left_join(EP7_Survey2010_ID_electoralsystems_merged, EP7partygroups, by = "group")
EP7_Survey2010_ID_electoralsystems_partygroups_position_merged <- left_join(EP7_Survey2010_ID_electoralsystems_partygroups_merged, EP7_Positions, by = "epid")

# Include second MEP activity file

EP7_Activity_final <- read.csv("data_ep_2009-2014/ep_7_activity_2.csv")

# Join with dataset
EP7_Survey_2010 <- dplyr::left_join(EP7_Survey2010_ID_electoralsystems_partygroups_position_merged,  EP7_Activity_final, by = "name_merge")


# Create campaign index ---------------------------------------------------

# Classic campaigning
EP7_Survey_2010$use_telephone <- car::recode(EP7_Survey_2010$q9_6_1, "1:3=1; 4=0")
EP7_Survey_2010$use_doortodoor <- car::recode(EP7_Survey_2010$q9_6_2, "1:3=1; 4=0")
EP7_Survey_2010$use_mailing <- car::recode(EP7_Survey_2010$q9_6_3, "1:3=1; 4=0")
EP7_Survey_2010$use_partymeetings <- car::recode(EP7_Survey_2010$q9_6_4, "1:3=1; 4=0")
EP7_Survey_2010$use_publicmeetings <- car::recode(EP7_Survey_2010$q9_6_5, "1:3=1; 4=0")
EP7_Survey_2010$use_pressconferences <- car::recode(EP7_Survey_2010$q9_6_6, "1:3=1; 4=0")
EP7_Survey_2010$use_mediarelations <- car::recode(EP7_Survey_2010$q9_6_7, "1:3=1; 4=0")
EP7_Survey_2010$use_fundraising <- car::recode(EP7_Survey_2010$q9_6_11, "1:3=1; 4=0")


# Promotion material
EP7_Survey_2010$use_newsletters <- car::recode(EP7_Survey_2010$q9_7_1, "1=1" )
EP7_Survey_2010$use_posters <- car::recode(EP7_Survey_2010$q9_7_2, "1=1" )
EP7_Survey_2010$use_stickers <- car::recode(EP7_Survey_2010$q9_7_5, "1=1" )
EP7_Survey_2010$use_radioads <- car::recode(EP7_Survey_2010$q9_7_6, "1=1" )
EP7_Survey_2010$use_tvads <- car::recode(EP7_Survey_2010$q9_7_7, "1=1" )
EP7_Survey_2010$use_newspaperads <- car::recode(EP7_Survey_2010$q9_7_8, "1=1" )
EP7_Survey_2010$use_sms <- car::recode(EP7_Survey_2010$q9_7_9, "1=1" )


# Internet campaigning
EP7_Survey_2010$use_website <- car::recode(EP7_Survey_2010$q9_6_8, "1:3=1; 4=0")
EP7_Survey_2010$use_email <- car::recode(EP7_Survey_2010$q9_6_9, "1:3=1; 4=0")
EP7_Survey_2010$use_blog <- car::recode(EP7_Survey_2010$q9_6_10, "1:3=1; 4=0")
EP7_Survey_2010$use_socialmedia <- car::recode(EP7_Survey_2010$q9_6_12, "1:3=1; 4=0")


# Create additive indices

EP7_Survey_2010$use_classic <- rowSums(EP7_Survey_2010[,c("use_telephone",
                                   "use_doortodoor",
                                   "use_partymeetings",
                                   "use_mailing",
                                   "use_publicmeetings",
                                   "use_pressconferences",
                                   "use_mediarelations",
                                   "use_fundraising")], na.rm=TRUE)


EP7_Survey_2010$use_material <- as.numeric(rowSums(EP7_Survey_2010[,c("use_newsletters",
                                                                           "use_posters",
                                                                           "use_stickers",
                                                                           "use_radioads",
                                                                           "use_tvads",
                                                                           "use_newspaperads",
                                                                           "use_sms"
                                                                           )], na.rm=FALSE) )
EP7_Survey_2010$use_postmodern <- rowSums(EP7_Survey_2010[,c("use_website",
                                                           "use_email",
                                                           "use_blog",
                                                           "use_socialmedia")], na.rm=FALSE) 


EP7_Survey_2010$use_campaign <- rowSums(EP7_Survey_2010[,c("use_classic", "use_postmodern", "use_material")], na.rm=TRUE)


# Recode variables for ordinal index --------------------------------------

EP7_Survey_2010$use_ord_telephone <- car::recode(EP7_Survey_2010$q9_6_1, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_doortodoor <- car::recode(EP7_Survey_2010$q9_6_2, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_mailing <- car::recode(EP7_Survey_2010$q9_6_3, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_partymeetings <- car::recode(EP7_Survey_2010$q9_6_4, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_publicmeetings <- car::recode(EP7_Survey_2010$q9_6_5, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_pressconferences <- car::recode(EP7_Survey_2010$q9_6_6, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_mediarelations <- car::recode(EP7_Survey_2010$q9_6_7, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_fundraising <- car::recode(EP7_Survey_2010$q9_6_11, "4=0; 3=1; 2=2; 1=3")
EP7_Survey_2010$use_ord_website <- car::recode(EP7_Survey_2010$q9_6_8, "4=0; 3=1; 2=2; 1=3") # Website
EP7_Survey_2010$use_ord_mail <- car::recode(EP7_Survey_2010$q9_6_9, "4=0; 3=1; 2=2; 1=3")  # Direct mail
EP7_Survey_2010$use_ord_blog <- car::recode(EP7_Survey_2010$q9_6_10, "4=0; 3=1; 2=2; 1=3")  # Weblog

# Create additive index (ordinal)
EP7_Survey_2010$use_ord_total <- rowSums(EP7_Survey_2010[,c("use_ord_telephone",
                                                          "use_ord_doortodoor",
                                                          "use_ord_partymeetings",
                                                          "use_ord_mailing",
                                                          "use_ord_publicmeetings",
                                                          "use_ord_pressconferences",
                                                          "use_ord_mediarelations",
                                                          "use_ord_fundraising",
                                                          "use_ord_website",
                                                          "use_ord_mail",
                                                          "use_ord_blog")], na.rm=TRUE)


# Recode and generate legislative activity variables ----------------------

EP7_Survey_2010$vote_activity <- EP7_Survey_2010$rollcall
EP7_Survey_2010$questions <- EP7_Survey_2010$parliamentaryquestions 
EP7_Survey_2010$questions_qtime <- EP7_Survey_2010$qt
EP7_Survey_2010$reports <- EP7_Survey_2010$reports.x
EP7_Survey_2010$opinions <- EP7_Survey_2010$opinions.x
EP7_Survey_2010$motions <- EP7_Survey_2010$motionsforresolutionssigned

EP7_Survey_2010$debate_speeches <- EP7_Survey_2010$debate_speech # not used in index

EP7_Survey_2010$leg_activity <- rowSums(EP7_Survey_2010[,c("questions",
                                                           "questions_qtime",
                                                           "reports",
                                                           "opinions",
                                                           "motions")], na.rm=TRUE)


# Create same index, but this time divide it by the number of days

# Transform Startdate and Enddate to date format
EP7_Survey_2010$start <- as.Date(EP7_Survey_2010$Startdate)

summary(EP7_Survey_2010$start)
# One observation is coded as 1960-01-02 as startdate which is clearly wrong. 
# Instead it has to be "2009-07-20"
EP7_Survey_2010$start[EP7_Survey_2010$start=="1960-01-02"] <- "2009-07-20"

EP7_Survey_2010$end <- as.Date(EP7_Survey_2010$enddate)

# There is one observations with a value of "-1" which is not a possible value. 
# However, actually this MEP entered the parliament on "2009-07-14"/"2009-07-20"
# Recode this observation
EP7_Survey_2010$start[EP7_Survey_2010$start=="2012-01-19"] <- "2009-07-20"

# Calculate number of days served in EP7
EP7_Survey_2010$days_served <- as.numeric(EP7_Survey_2010$end - EP7_Survey_2010$start)


EP7_Survey_2010$months_served <- EP7_Survey_2010$days_served / 30.4

# Calculate legislative activity again, but this time divide it by months served

EP7_Survey_2010$vote_activity_month <- EP7_Survey_2010$rollcall /EP7_Survey_2010$months_served
EP7_Survey_2010$questions_month <- EP7_Survey_2010$questions / EP7_Survey_2010$months_served
EP7_Survey_2010$questions_qtime_month <- EP7_Survey_2010$qt / EP7_Survey_2010$months_served
EP7_Survey_2010$reports_month <- EP7_Survey_2010$reports.x / EP7_Survey_2010$months_served
EP7_Survey_2010$opinions_month <- EP7_Survey_2010$opinions.x / EP7_Survey_2010$months_served
EP7_Survey_2010$motions_month <- EP7_Survey_2010$motionsforresolutionssigned / EP7_Survey_2010$months_served


EP7_Survey_2010$debate_speeches <- EP7_Survey_2010$debate_speech / EP7_Survey_2010$days_served # not use in index

# Create additive index
EP7_Survey_2010$leg_activity_month <- rowSums(EP7_Survey_2010[,c("questions_month",
                                                                "questions_qtime_month",
                                                                "reports_month",
                                                                "opinions_month",
                                                                "motions_month")], na.rm=TRUE)

# Rename variables
EP7_Survey_2010$country <- EP7_Survey_2010$Country
EP7_Survey_2010$party_group <- EP7_Survey_2010$group_short

# Create age squared variable
EP7_Survey_2010$age_squared <- EP7_Survey_2010$age^2

# Create female dummy variable
EP7_Survey_2010$female <- factor(EP7_Survey_2010$gender)

# Change "respondent" variable (NA to 0)
EP7_Survey_2010$respondent <- car::recode(EP7_Survey_2010$respondent, "1=1; NA=0")

# Keep variables needed for regressions ---
EP7_Survey_2010_subset <- dplyr::select(EP7_Survey_2010,
                                        fullname,
                                 incumbent,
                                 respondent,
                                 country,
                                 committee_chair,
                                 partypos,
                                 open_list,
                                 prop_rep,
                                 group,
                                 party_group,
                                 party_publish,
                                 seats,
                                 seat_share,
                                 seat_share_others0,
                                 use_telephone,
                                 use_doortodoor,
                                 use_partymeetings,
                                 use_mailing,
                                 use_publicmeetings,
                                 use_pressconferences,
                                 use_mediarelations,
                                 use_fundraising,
                                 use_website,
                                 use_email,
                                 use_blog,
                                 use_socialmedia,
                                 use_newsletters,
                                 use_posters,
                                 use_stickers,
                                 use_radioads,
                                 use_tvads,
                                 use_newspaperads,
                                 use_sms,
                                 use_campaign,
                                 use_classic,
                                 use_postmodern,
                                 use_material,
                                 use_ord_telephone,
                                 use_ord_doortodoor,
                                 use_ord_partymeetings,
                                 use_ord_mailing,
                                 use_ord_publicmeetings,
                                 use_ord_pressconferences,
                                 use_ord_mediarelations,
                                 use_ord_fundraising,
                                 use_ord_website,
                                 use_ord_mail,
                                 use_ord_blog,
                                 use_ord_total,
                                 vote_activity,
                                 questions,
                                 questions_qtime,
                                 reports,
                                 opinions,
                                 motions,
                                 debate_speeches,
                                 leg_activity,
                                 months_served,
                                 vote_activity_month,
                                 questions_month,
                                 questions_qtime_month,
                                 reports_month,
                                 opinions_month,
                                 motions_month,
                                 debate_speeches,
                                 leg_activity_month,
                                 age,
                                 age_squared,
                                 female)

# Save this object as a csv file which can be used to replicate all regressions ---

EP_7_final <- EP7_Survey_2010_subset %>% 
  mutate(leg_activity_month_round = ceiling(leg_activity_month)) %>% 
  mutate(partygroup = party_group) %>% 
  mutate(reports_round = ceiling(reports))

# excludes 4 MEPs who are coded wrong
write_csv(EP_7_final, "EP7_final.csv")

